library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(dplyr)
library(lubridate)
library(ggplot2)
rat=read_csv("./data/rat_sighting_raw1.csv", na=c("NA","","0"))|>
  janitor::clean_names()
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 259233 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): Created Date, Closed Date, Incident Address, City, Landmark, Facili...
## dbl (2): Unique Key, Incident Zip
## lgl (7): Vehicle Type, Taxi Company Borough, Taxi Pick Up Location, Bridge H...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(rat)
## Rows: 259,233
## Columns: 18
## $ unique_key               <dbl> 63575470, 63572156, 63580575, 63583963, 63573…
## $ created_date             <chr> "2024 Dec 31 10:58:24 PM", "2024 Dec 31 10:49…
## $ closed_date              <chr> "2025 Jan 03 09:32:40 AM", "2025 Jan 03 09:32…
## $ incident_zip             <dbl> 10475, 10475, 10029, 11215, 10021, 11101, 114…
## $ incident_address         <chr> "3550 BIVONA STREET", "3550 BIVONA STREET", "…
## $ city                     <chr> "BRONX", "BRONX", "NEW YORK", "BROOKLYN", "NE…
## $ landmark                 <chr> "BIVONA STREET", "BIVONA STREET", "PLEASANT A…
## $ facility_type            <chr> "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/…
## $ community_board          <chr> "12 BRONX", "12 BRONX", "11 MANHATTAN", "06 B…
## $ borough                  <chr> "BRONX", "BRONX", "MANHATTAN", "BROOKLYN", "M…
## $ park_borough             <chr> "BRONX", "BRONX", "MANHATTAN", "BROOKLYN", "M…
## $ vehicle_type             <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ taxi_company_borough     <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ taxi_pick_up_location    <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ bridge_highway_name      <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ bridge_highway_direction <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ road_ramp                <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ bridge_highway_segment   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
rat
## # A tibble: 259,233 × 18
##    unique_key created_date       closed_date incident_zip incident_address city 
##         <dbl> <chr>              <chr>              <dbl> <chr>            <chr>
##  1   63575470 2024 Dec 31 10:58… 2025 Jan 0…        10475 3550 BIVONA STR… BRONX
##  2   63572156 2024 Dec 31 10:49… 2025 Jan 0…        10475 3550 BIVONA STR… BRONX
##  3   63580575 2024 Dec 31 10:28… 2025 Jan 0…        10029 293 PLEASANT AV… NEW …
##  4   63583963 2024 Dec 31 06:15… 2025 Jan 0…        11215 441 13 STREET    BROO…
##  5   63573818 2024 Dec 31 06:10… 2024 Dec 3…        10021 144 EAST   74 S… NEW …
##  6   63572161 2024 Dec 31 06:04… 2024 Dec 3…        11101 21-06 45 AVENUE  LONG…
##  7   63578888 2024 Dec 31 05:26… 2025 Jan 0…        11432 160-19 GRAND CE… JAMA…
##  8   63577241 2024 Dec 31 05:05… 2024 Dec 3…        11233 68 SOMERS STREET BROO…
##  9   63583968 2024 Dec 31 04:47… 2025 Jan 0…        11377 47-34 45 STREET  WOOD…
## 10   63572155 2024 Dec 31 04:31… 2024 Dec 3…        10456 320 EAST  167 S… BRONX
## # ℹ 259,223 more rows
## # ℹ 12 more variables: landmark <chr>, facility_type <chr>,
## #   community_board <chr>, borough <chr>, park_borough <chr>,
## #   vehicle_type <lgl>, taxi_company_borough <lgl>,
## #   taxi_pick_up_location <lgl>, bridge_highway_name <lgl>,
## #   bridge_highway_direction <lgl>, road_ramp <lgl>,
## #   bridge_highway_segment <lgl>
rat1= rat |>
  mutate(
    created_date=parse_date_time(created_date, orders = "Y b d I:M:S p") 
  ) 
rat1
## # A tibble: 259,233 × 18
##    unique_key created_date        closed_date      incident_zip incident_address
##         <dbl> <dttm>              <chr>                   <dbl> <chr>           
##  1   63575470 2024-12-31 22:58:24 2025 Jan 03 09:…        10475 3550 BIVONA STR…
##  2   63572156 2024-12-31 22:49:24 2025 Jan 03 09:…        10475 3550 BIVONA STR…
##  3   63580575 2024-12-31 22:28:26 2025 Jan 02 07:…        10029 293 PLEASANT AV…
##  4   63583963 2024-12-31 18:15:58 2025 Jan 02 10:…        11215 441 13 STREET   
##  5   63573818 2024-12-31 18:10:15 2024 Dec 31 06:…        10021 144 EAST   74 S…
##  6   63572161 2024-12-31 18:04:50 2024 Dec 31 06:…        11101 21-06 45 AVENUE 
##  7   63578888 2024-12-31 17:26:40 2025 Jan 03 03:…        11432 160-19 GRAND CE…
##  8   63577241 2024-12-31 17:05:26 2024 Dec 31 05:…        11233 68 SOMERS STREET
##  9   63583968 2024-12-31 16:47:45 2025 Jan 03 03:…        11377 47-34 45 STREET 
## 10   63572155 2024-12-31 16:31:51 2024 Dec 31 04:…        10456 320 EAST  167 S…
## # ℹ 259,223 more rows
## # ℹ 13 more variables: city <chr>, landmark <chr>, facility_type <chr>,
## #   community_board <chr>, borough <chr>, park_borough <chr>,
## #   vehicle_type <lgl>, taxi_company_borough <lgl>,
## #   taxi_pick_up_location <lgl>, bridge_highway_name <lgl>,
## #   bridge_highway_direction <lgl>, road_ramp <lgl>,
## #   bridge_highway_segment <lgl>
rat_cleaned = rat1 |>
  filter(
    year(created_date) >=2019 & year(created_date) <=2024
  ) |>
  mutate(
    year=year(created_date),
    month = month(created_date, label = TRUE, abbr = FALSE)
  )|>
  group_by(year,month) |>
  summarise(rat_sighting_count = n())
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
rat_cleaned
## # A tibble: 72 × 3
## # Groups:   year [6]
##     year month     rat_sighting_count
##    <dbl> <ord>                  <int>
##  1  2019 January                 1020
##  2  2019 February                 956
##  3  2019 March                   1126
##  4  2019 April                   1510
##  5  2019 May                     1863
##  6  2019 June                    1822
##  7  2019 July                    1824
##  8  2019 August                  1862
##  9  2019 September               1640
## 10  2019 October                 1405
## # ℹ 62 more rows
rat_cleaned$month <- factor(
  rat_cleaned$month,
  levels = month.name,
  ordered = TRUE
)

ggplot(rat_cleaned, 
       aes(x = month, y = rat_sighting_count)) +
  geom_line(aes(group = year, colour = factor(year)), size = 1) +
  geom_point(aes(colour = factor(year)), size = 2) +
  facet_wrap(~ year, ncol = 2) +
  scale_color_brewer(palette = "Dark2") +
  labs(
    title = "Monthly Rat Sightings in NYC (2019–2024)",
    x="Month",
    y="Number of Rat Sightings",
    color="Year"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
plot_ly(
  rat_cleaned,
  x = ~month,
  y = ~rat_sighting_count,
  color = ~factor(year),
  type = 'scatter',
  mode = 'lines+markers'
) %>%
  layout(
    title = "Monthly Rat Sightings in NYC (2019–2024)",
    xaxis = list(title = "Month"),
    yaxis = list(title = "Number of Rat Sightings")
  )